library(Seurat)
library(ggplot2)
library(RColorBrewer)
library(writexl)
library(tidyverse)
color <- c(brewer.pal(8,"Paired"), brewer.pal(8,"Set3"),
           brewer.pal(8,"Set2"), brewer.pal(8,"Accent"),
           brewer.pal(9,"Pastel1"))
data2 <- readRDS("/Users/xu/Desktop/CART/data/qcdata/batch2_afterqcdata.rds")
spe <- readRDS("/Users/xu/Desktop/CART/data/qcdata/2025-02-21_sfe_qc.rds")
seurat <- readRDS("/Users/xu/Desktop/CART/data/seuratrawdata/batch1_CARTXenium_seurat_250218130928.rds")
dim(spe@colData)
## [1] 50042    21
cellid <- rownames(spe@colData)
data1<- seurat[,cellid]

merged_obj <- merge(data1, y = data2)
merged_obj$batch <- rep(c("batch1", "batch2"), times = c(ncol(data1), ncol(data2)))
merged_obj <- JoinLayers(merged_obj) 
merged_obj <- NormalizeData(merged_obj,scale.factor = 300)
all.genes <- rownames(merged_obj)
merged_obj<- ScaleData(merged_obj ,features = all.genes, split.by=merged_obj@meta.data$SampleId)
merged_obj <- RunPCA(merged_obj,features = all.genes)
ElbowPlot(merged_obj,ndims=50,reduction = "pca")

merged_obj <- FindNeighbors(merged_obj,dims =1:30)
merged_obj <- FindClusters(merged_obj,resolution = 1, algorithm = 4)
merged_obj <- RunUMAP(merged_obj, dims = 1:30)
DimPlot(merged_obj, reduction ="umap",pt.size = 0.25,alpha = 0.2, label = TRUE, label.size = 2, repel = TRUE,cols = color) 

for (cluster_id in unique(merged_obj@meta.data$seurat_clusters)) {
  cluster_subset <- subset(merged_obj, seurat_clusters == cluster_id)
 assign(paste0("c", cluster_id), cluster_subset)
}

c6<- FindClusters(c6, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3834
## Number of edges: 119401
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7427
## Number of communities: 9
## Elapsed time: 0 seconds
c6$annotation <- "Unknown"
c6$annotation[c6$seurat_clusters%in%c(4)] <- "fibroblast"
c6$annotation[c6$seurat_clusters%in%c(3,5)] <- "macrophage"
c6$annotation[c6$seurat_clusters%in%c(0,1,2,6,7,8)] <-"T cell"

c11 <- FindClusters(c11, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2821
## Number of edges: 95849
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7379
## Number of communities: 5
## Elapsed time: 0 seconds
c11$annotation <- "Unknown"
c11$annotation[c11$seurat_clusters%in%c(1,3)] <- "fibroblast"
c11$annotation[c11$seurat_clusters%in%c(0,2,4)] <-"smooth muscle cell"

c13 <- FindClusters(c13, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2418
## Number of edges: 99415
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6947
## Number of communities: 6
## Elapsed time: 0 seconds
c13$annotation <- "Unknown"
c13$annotation[c13$seurat_clusters==5] <- "smooth muscle cell"
c13$annotation[c13$seurat_clusters%in%c(0,1,2,3,4)] <-"keratinocyte"

c18 <- FindClusters(c18, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2233
## Number of edges: 58298
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7666
## Number of communities: 10
## Elapsed time: 0 seconds
c18$annotation <- "Unknown"
c18$annotation[c18$seurat_clusters==8] <- "fibroblast"
c18$annotation[c18$seurat_clusters==3] <-  "T cell"
c18$annotation[c18$seurat_clusters%in%c(0,1,2,4,5,6,7,9)] <-"keratinocyte"


c20 <- FindClusters(c20, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1090
## Number of edges: 31713
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6142
## Number of communities: 5
## Elapsed time: 0 seconds
c20$annotation <- "Unknown"
c20$annotation[c20$seurat_clusters==0] <- "fibroblast"
c20$annotation[c20$seurat_clusters%in%c(1,2,3,4)] <- "T cell"

c22 <- FindClusters(c22, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 993
## Number of edges: 35865
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6805
## Number of communities: 5
## Elapsed time: 0 seconds
c22$annotation <- "Unknown"
c22$annotation[c22$seurat_clusters%in%c(1,3)] <- "melanocyte"
c22$annotation[c22$seurat_clusters%in%c(0,2,4)] <- "keratinocyte"


c25 <- FindClusters(c25, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 748
## Number of edges: 27468
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5710
## Number of communities: 4
## Elapsed time: 0 seconds
c25$annotation <- "Unknown"
c25$annotation[c25$seurat_clusters==2] <- "fibroblast"
c25$annotation[c25$seurat_clusters%in%c(0,1,3)] <- "Schwann cell"

c31 <- FindClusters(c31, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 267
## Number of edges: 6423
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5758
## Number of communities: 4
## Elapsed time: 0 seconds
c31$annotation <- "Unknown"
c31$annotation[c31$seurat_clusters%in%c(0,2)] <- "keratinocyte"
c31$annotation[c31$seurat_clusters%in%c(1,3)] <- "Langerhans cell"
merged_obj$first_annotation <- "unknown"
merged_obj$first_annotation[merged_obj$seurat_clusters%in%c(4,7,8,12,14,15,17,19,21,24,27,30,32)] <- "keratinocyte"
merged_obj$first_annotation[merged_obj$seurat_clusters%in%c(2,3,9,16)] <- "fibroblast"
merged_obj$first_annotation[merged_obj$seurat_clusters%in%c(5,28)] <- "endothelial cell"
merged_obj$first_annotation[merged_obj$seurat_clusters%in%c(29)] <- "adipocyte"
merged_obj$first_annotation[merged_obj$seurat_clusters%in%c(1,26)] <- "T cell"
merged_obj$first_annotation[merged_obj$seurat_clusters%in%c(23)] <- "mast cell"
merged_obj$first_annotation[merged_obj$seurat_clusters%in%c(10)] <- "macrophage"
merged_obj$first_annotation<- c31$annotation
merged_obj$first_annotation <- c25$annotation
merged_obj$first_annotation <- c22$annotation
merged_obj$first_annotation<- c20$annotation
merged_obj$first_annotation<- c18$annotation
merged_obj$first_annotation <- c13$annotation
merged_obj$first_annotation <- c11$annotation
merged_obj$first_annotation <- c6$annotation
table(merged_obj$first_annotation)
## 
##          adipocyte   endothelial cell         fibroblast       keratinocyte 
##                355               4285              19123              30562 
##    Langerhans cell         macrophage          mast cell         melanocyte 
##                119               4033                948                427 
##       Schwann cell smooth muscle cell             T cell 
##                571               1944              12269
DimPlot(merged_obj, reduction ="umap",pt.size = 0.25,alpha = 0.2 ,label = TRUE, label.size = 2, repel = TRUE,cols = color,group.by = "first_annotation") 

merged_obj$major_celltype <- merged_obj$first_annotation
merged_obj$major_celltype[merged_obj$major_celltype%in%c("Langerhans cell","macrophage","mast cell","T cell")] <- "immune cell"
table(merged_obj$major_celltype)
## 
##          adipocyte   endothelial cell         fibroblast        immune cell 
##                355               4285              19123              17369 
##       keratinocyte         melanocyte       Schwann cell smooth muscle cell 
##              30562                427                571               1944
DimPlot(merged_obj, reduction ="umap",pt.size = 0.25,alpha = 0.2, label = TRUE, label.size = 2, repel = TRUE,cols = color,group.by = "major_celltype") 

merged_obj$Patient[merged_obj$SampleId=="SSc213"] <- "Patient2"
merged_obj$fourgroups <- merged_obj$Group
merged_obj$fourgroups [merged_obj$Group%in%c("SSc_postCART_1m","SSc_postCART_2m")] <- "SSc_postCART_early"
merged_obj$fourgroups [merged_obj$Group%in%c("SSc_postCART_4m","SSc_postCART_6m")] <- "SSc_postCART_late"
merged_obj$fourgroups [merged_obj$Group%in%c("SSc_postCART_9m","SSc_postCART_1y")] <- "SSc_postCART_superlate"

table(merged_obj$fourgroups)
## 
##     SSc_postCART_early      SSc_postCART_late SSc_postCART_superlate 
##                  25178                  13618                  12053 
##            SSc_preCART 
##                  23787
saveRDS(merged_obj,"/Users/xu/Desktop/1_project/merged_obj_manually.rds")
merged_obj <- readRDS("/Users/xu/Desktop/1_project/merged_obj_manually.rds")
autodata <- readRDS("/Users/xu/Desktop/CART/data/annotation/major celltype/mergeddata/Erlangen_CART_XeniumScimilarity250320112810.rds")
DimPlot(autodata, reduction ="umap.unintegrated", pt.size = 0.25, alpha=0.2,label = TRUE, label.size = 5, repel = TRUE,,group.by="scimilarity_hint") 

table(autodata$scimilarity_hint)
## 
##                               B cell      CD4-positive, alpha-beta T cell 
##                                  379                                 4371 
##      CD8-positive, alpha-beta T cell                      Langerhans cell 
##                                 1179                                   45 
##                           basal cell        blood vessel endothelial cell 
##                                23310                                   27 
##           capillary endothelial cell                   classical monocyte 
##                                 3427                                 1819 
## endothelial cell of lymphatic vessel                          erythrocyte 
##                                  444                                    4 
##                           fibroblast                           glial cell 
##                                19761                                  307 
##                 innate lymphoid cell                         keratinocyte 
##                                   45                                 3548 
##                           macrophage                            mast cell 
##                                 5367                                  835 
##                           melanocyte                   myofibroblast cell 
##                                  628                                 3224 
##                  natural killer cell                           neutrophil 
##                                  666                                  359 
##                          plasma cell          plasmacytoid dendritic cell 
##                                  799                                 1011 
##                             platelet                      progenitor cell 
##                                   25                                  869 
##                    regulatory T cell                   smooth muscle cell 
##                                 1178                                 1009
unique(autodata@meta.data$scimilarity_hint)
##  [1] basal cell                           keratinocyte                        
##  [3] melanocyte                           macrophage                          
##  [5] fibroblast                           endothelial cell of lymphatic vessel
##  [7] myofibroblast cell                   mast cell                           
##  [9] capillary endothelial cell           classical monocyte                  
## [11] regulatory T cell                    progenitor cell                     
## [13] CD4-positive, alpha-beta T cell      Langerhans cell                     
## [15] platelet                             smooth muscle cell                  
## [17] glial cell                           neutrophil                          
## [19] natural killer cell                  B cell                              
## [21] plasma cell                          blood vessel endothelial cell       
## [23] plasmacytoid dendritic cell          CD8-positive, alpha-beta T cell     
## [25] innate lymphoid cell                 erythrocyte                         
## 26 Levels: B cell ... smooth muscle cell
category_mapping <- c(
  "basal cell" = "keratinocyte",
  "keratinocyte" = "keratinocyte",
  "fibroblast" = "fibroblast",
  "macrophage" = "immune cell",
  "classical monocyte" = "immune cell",
  "regulatory T cell" = "immune cell",
  "CD4-positive, alpha-beta T cell" = "immune cell",
  "CD8-positive, alpha-beta T cell" = "immune cell",
  "Langerhans cell" = "immune cell",
  "mast cell" = "immune cell",
  "natural killer cell" = "immune cell",
  "B cell" = "immune cell",
  "plasma cell" = "immune cell",
  "neutrophil" = "immune cell",
  "innate lymphoid cell" = "immune cell",
  "plasmacytoid dendritic cell" = "immune cell",
  "platelet" = "platelet",  
  "progenitor cell" = "progenitor cell",  
  "erythrocyte" = "erythrocyte",  
  "myofibroblast cell" = "fibroblast",
  "capillary endothelial cell" = "endothelial cell",
  "blood vessel endothelial cell" = "endothelial cell",
  "endothelial cell of lymphatic vessel" = "endothelial cell",
  "glial cell" = "Schwann cell",
  "melanocyte" = "melanocyte",
  "smooth muscle cell" = "smooth muscle cell",
  "vascular associated smooth muscle cell" = "smooth muscle cell"
)
autodata$scimilarity_hint <- as.character(autodata$scimilarity_hint)
category_mapping <- unlist(category_mapping)
autodata@meta.data$major_scimilarity_hint <- category_mapping[autodata@meta.data$scimilarity_hint]
autodata@meta.data$major_scimilarity_hint[is.na(autodata@meta.data$major_scimilarity_hint)] <- "unknown"
table(autodata@meta.data$major_scimilarity_hint)
## 
##   endothelial cell        erythrocyte         fibroblast        immune cell 
##               3898                  4              22985              18053 
##       keratinocyte         melanocyte           platelet    progenitor cell 
##              26858                628                 25                869 
##       Schwann cell smooth muscle cell 
##                307               1009
DimPlot(autodata, reduction ="umap.unintegrated", pt.size = 0.25, label = TRUE, label.size = 5, repel = TRUE,,group.by="major_scimilarity_hint")

autodata$major_celltype <- merged_obj$major_celltype

table(autodata$major_celltype)
## 
##          adipocyte   endothelial cell         fibroblast        immune cell 
##                355               4285              19123              17369 
##       keratinocyte         melanocyte       Schwann cell smooth muscle cell 
##              30562                427                571               1944
diff_annotations <- autodata$major_celltype!= autodata$major_scimilarity_hint
data_diff <- subset(autodata, cells = which(diff_annotations))
table(diff_annotations)
## diff_annotations
## FALSE  TRUE 
## 63725 10911
#FALSE  TRUE 
#63725 10911
autodata$highlight <- ifelse(diff_annotations, "differ", "same")
DimPlot(autodata, group.by = "highlight") +
  ggtitle("UMAP with differing annotations") +
  scale_color_manual(values = c("#866aa3", "grey")) +
  theme_minimal()+
  theme(
    panel.grid = element_blank(),  
    plot.title = element_text(face = "bold", size = 16)  
  )

all.genes <- rownames(data_diff)
data_diff <- RunPCA(data_diff,features = all.genes)
ElbowPlot(data_diff,ndims=50,reduction = "pca")

data_diff <- FindNeighbors(data_diff,dims =1:30)
data_diff <- FindClusters(data_diff,resolution = 1, algorithm = 4)
data_diff <- RunUMAP(data_diff, dims = 1:30)
DimPlot(data_diff, reduction ="umap", pt.size = 0.25, label = TRUE, label.size = 3, repel = TRUE,cols = color) 

for (cluster_id in unique(data_diff@meta.data$seurat_clusters)) {
  cluster_subset <- subset(data_diff, seurat_clusters == cluster_id)
 assign(paste0("cl", cluster_id), cluster_subset)
}
cl1<- FindClusters(cl1, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1192
## Number of edges: 34432
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6865
## Number of communities: 7
## Elapsed time: 0 seconds
cl1$finalannotation <- "Unknown"
cl1$finalannotation[cl1$seurat_clusters%in%c(0,6)] <- "fibroblast"
cl1$finalannotation[cl1$seurat_clusters%in%c(1,2,3,4,5)] <- "smooth muscle cell"
table(cl1$finalannotation)
## 
##         fibroblast smooth muscle cell 
##                448                744
cl2<- FindClusters(cl2, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1163
## Number of edges: 54447
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5755
## Number of communities: 5
## Elapsed time: 0 seconds
cl2$finalannotation <- "Unknown"
cl2$finalannotation[cl2$seurat_clusters%in%c(1,2,4)] <- "fibroblast"
cl2$finalannotation[cl2$seurat_clusters%in%c(0,3)] <- "keratinocyte"
table(cl2$finalannotation)
## 
##   fibroblast keratinocyte 
##          674          489
cl3<- FindClusters(cl3, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1063
## Number of edges: 39495
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5850
## Number of communities: 8
## Elapsed time: 0 seconds
cl3$finalannotation <- "Unknown"
cl3$finalannotation[cl3$seurat_clusters%in%c(0,2,3)] <- "fibroblast"
cl3$finalannotation[cl3$seurat_clusters%in%c(1,4,5,6,7)] <- "immune cell"
table(cl3$finalannotation)
## 
##  fibroblast immune cell 
##         493         570
cl5<- FindClusters(cl5, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 891
## Number of edges: 34710
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.4978
## Number of communities: 7
## Elapsed time: 0 seconds
cl5$finalannotation <- "Unknown"
cl5$finalannotation[cl5$seurat_clusters%in%c(3,5,6)] <- "keratinocyte"
cl5$finalannotation[cl5$seurat_clusters%in%c(0,1,2,4)] <- "adipocyte"
table(cl5$finalannotation)
## 
##    adipocyte keratinocyte 
##          685          206
cl6<- FindClusters(cl6, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 883
## Number of edges: 33655
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.4666
## Number of communities: 6
## Elapsed time: 0 seconds
cl6$finalannotation <- "Unknown"
cl6$finalannotation[cl6$seurat_clusters%in%c(1,2,3,4,5)] <- "immune cell"
cl6$finalannotation[cl6$seurat_clusters%in%c(0)] <- "fibroblast"
table(cl6$finalannotation)
## 
##  fibroblast immune cell 
##         203         680
cl9<- FindClusters(cl9, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 554
## Number of edges: 16441
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.4358
## Number of communities: 7
## Elapsed time: 0 seconds
cl9$finalannotation <- "Unknown"
cl9$finalannotation[cl9$seurat_clusters%in%c(1,3)] <- "fibroblast"
cl9$finalannotation[cl9$seurat_clusters%in%c(0,2,4,5,6)] <- "endothelial cell"
table(cl9$finalannotation)
## 
## endothelial cell       fibroblast 
##              343              211
cl10<- FindClusters(cl10, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 518
## Number of edges: 17434
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.4674
## Number of communities: 6
## Elapsed time: 0 seconds
cl10$finalannotation <- "Unknown"
cl10$finalannotation[cl10$seurat_clusters%in%c(1,2,3,4,5)] <- "fibroblast"
cl10$finalannotation[cl10$seurat_clusters%in%c(0)] <- "smooth muscle cell"
table(cl10$finalannotation)
## 
##         fibroblast smooth muscle cell 
##                414                104
cl13<- FindClusters(cl13, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 353
## Number of edges: 8945
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5956
## Number of communities: 5
## Elapsed time: 0 seconds
cl13$finalannotation <- "Unknown"
cl13$finalannotation[cl13$seurat_clusters%in%c(0,4)] <- "smooth muscle cell"
cl13$finalannotation[cl13$seurat_clusters%in%c(1,2,3)] <- "keratinocyte"
table(cl13$finalannotation)
## 
##       keratinocyte smooth muscle cell 
##                183                170
cl16<- FindClusters(cl16, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 185
## Number of edges: 6468
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.2701
## Number of communities: 6
## Elapsed time: 0 seconds
cl16$finalannotation <- "Unknown"
cl16$finalannotation[cl16$seurat_clusters%in%c(0,1,2,4,5)] <- "immune cell"
cl16$finalannotation[cl16$seurat_clusters%in%c(3)] <- "keratinocyte"
table(cl16$finalannotation)
## 
##  immune cell keratinocyte 
##          153           32
cl17<- FindClusters(cl17, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 179
## Number of edges: 5937
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.3225
## Number of communities: 4
## Elapsed time: 0 seconds
cl17$finalannotation <- "Unknown"
cl17$finalannotation[cl17$seurat_clusters%in%c(1)] <- "fibroblast"
cl17$finalannotation[cl17$seurat_clusters%in%c(0,2,3)] <- "smooth muscle cell"
table(cl17$finalannotation)
## 
##         fibroblast smooth muscle cell 
##                 59                120
cl18<- FindClusters(cl18, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 132
## Number of edges: 2800
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.4179
## Number of communities: 4
## Elapsed time: 0 seconds
cl18$finalannotation <- "Unknown"
cl18$finalannotation[cl18$seurat_clusters%in%c(0,1,3)] <- "fibroblast"
cl18$finalannotation[cl18$seurat_clusters%in%c(2)] <- "keratinocyte"
table(cl18$finalannotation)
## 
##   fibroblast keratinocyte 
##          105           27
cl19<- FindClusters(cl19, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 112
## Number of edges: 2787
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.1852
## Number of communities: 6
## Elapsed time: 0 seconds
cl19$finalannotation <- "Unknown"
cl19$finalannotation[cl19$seurat_clusters%in%c(0,3)] <- "fibroblast"
cl19$finalannotation[cl19$seurat_clusters%in%c(1,2,4)] <- "immune cell"
table(cl19$finalannotation)
## 
##  fibroblast immune cell 
##          53          59
data_diff$finalannotation <- "unkown"
data_diff$finalannotation[data_diff$seurat_clusters %in%c(4,7,8,14,15)] <-"keratinocyte"
data_diff$finalannotation[data_diff$seurat_clusters== 12] <-"adipocyte"
data_diff$finalannotation[data_diff$seurat_clusters==11] <-"Schwann cell"
data_diff$finalannotation <-cl1$finalannotation
data_diff$finalannotation <-cl2$finalannotation
data_diff$finalannotation <-cl3$finalannotation
data_diff$finalannotation<-cl5$finalannotation
data_diff$finalannotation<-cl6$finalannotation
data_diff$finalannotation<-cl9$finalannotation
data_diff$finalannotation<-cl10$finalannotation
data_diff$finalannotation <-cl13$finalannotation
data_diff$finalannotation <-cl16$finalannotation
data_diff$finalannotation<-cl17$finalannotation
data_diff$finalannotation<-cl18$finalannotation
data_diff$finalannotation <-cl19$finalannotation
table(data_diff$finalannotation)
## 
##          adipocyte   endothelial cell         fibroblast        immune cell 
##               1041                343               2660               1462 
##       keratinocyte       Schwann cell smooth muscle cell 
##               3895                372               1138
DimPlot(data_diff, reduction ="umap", pt.size = 0.25, label = TRUE, label.size = 5, repel = TRUE,cols = color,group.by="finalannotation") 

####
reannotated_data <- data_diff@meta.data$finalannotation
names(reannotated_data) <- rownames(data_diff@meta.data)  
matching_cells <- intersect(rownames(autodata@meta.data), names(reannotated_data))
identical(names(reannotated_data[matching_cells]), matching_cells)  
## [1] TRUE
autodata$major_celltype[rownames(autodata@meta.data) %in% matching_cells] <- reannotated_data[matching_cells]
table(autodata$major_celltype)
## 
##          adipocyte   endothelial cell         fibroblast        immune cell 
##               1041               3828              19878              16917 
##       keratinocyte         melanocyte       Schwann cell smooth muscle cell 
##              30359                370                633               1610
merged_obj $merged_annotation<- autodata$major_celltype
table(merged_obj$merged_annotation)
## 
##          adipocyte   endothelial cell         fibroblast        immune cell 
##               1041               3828              19878              16917 
##       keratinocyte         melanocyte       Schwann cell smooth muscle cell 
##              30359                370                633               1610
table(merged_obj$first_annotation)
## 
##          adipocyte   endothelial cell         fibroblast       keratinocyte 
##                355               4285              19123              30562 
##    Langerhans cell         macrophage          mast cell         melanocyte 
##                119               4033                948                427 
##       Schwann cell smooth muscle cell             T cell 
##                571               1944              12269
DimPlot(merged_obj, reduction ="umap", pt.size = 0.25, label = TRUE, label.size = 5, repel = TRUE,group.by="merged_annotation") 

#
cladipo <- subset(merged_obj,merged_annotation=="adipocyte")
cladipo<- FindClusters(cladipo, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1041
## Number of edges: 32269
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6624
## Number of communities: 12
## Elapsed time: 0 seconds
cladipo$finalannotation <- "Unknown"
cladipo$finalannotation[cladipo$seurat_clusters%in%c(0,1,3)] <- "keratinocyte"
cladipo$finalannotation[cladipo$seurat_clusters%in%c(2,4,5)] <- "adipocyte"
table(cladipo$finalannotation)
## 
##    adipocyte keratinocyte 
##          361          680
merged_obj$merged_annotation <- cladipo$finalannotation
table(merged_obj$merged_annotation)
## 
##          adipocyte   endothelial cell         fibroblast        immune cell 
##                361               3828              19878              16917 
##       keratinocyte         melanocyte       Schwann cell smooth muscle cell 
##              31039                370                633               1610
####
clsmc <- subset(merged_obj,merged_annotation=="smooth muscle cell")
clsmc<- FindClusters(clsmc, resolution =1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1610
## Number of edges: 45980
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6726
## Number of communities: 26
## Elapsed time: 0 seconds
clsmc$finalannotation <- "Unknown"
clsmc$finalannotation[clsmc$seurat_clusters%in%c(7)] <- "fibroblast"
clsmc$finalannotation[clsmc$seurat_clusters%in%c(6)] <- "pericyte"
clsmc$finalannotation[clsmc$seurat_clusters%in%c(8,10)] <- "immune cell"
clsmc$finalannotation[clsmc$seurat_clusters%in%c(0,1,2,3,4,5,9)] <- "smooth muscle cell"
table(clsmc$finalannotation)
## 
##         fibroblast        immune cell           pericyte smooth muscle cell 
##                 97                  5                101               1407
merged_obj$merged_annotation<- clsmc$finalannotation
table(merged_obj$merged_annotation)
## 
##          adipocyte   endothelial cell         fibroblast        immune cell 
##                361               3828              19975              16922 
##       keratinocyte         melanocyte           pericyte       Schwann cell 
##              31039                370                101                633 
## smooth muscle cell 
##               1407
#############
merged_obj$merged_annotation[merged_obj$merged_annotation=="melanocyte"] <- "keratinocyte"
table(merged_obj$merged_annotation)
## 
##          adipocyte   endothelial cell         fibroblast        immune cell 
##                361               3828              19975              16922 
##       keratinocyte           pericyte       Schwann cell smooth muscle cell 
##              31409                101                633               1407
DimPlot(merged_obj, reduction ="umap", pt.size = 0.25, label = TRUE, label.size = 5, repel = TRUE,cols = color,group.by="merged_annotation")

majormarkergene <- c("KRT5","KRT19","KRT20",
                     "COL1A1","COL1A2","LUM",
                     "VWF","ERG","PECAM1",
                     "CD3E","CD68","CD19",
                     "MLANA","KIT","MITF",
                     "MYL9","TAGLN","MYH11",
                     "SOX10","GLDN","MPZ",
                     "PDGFRA","PDGFRB","RGS5",
                    "PPARG","ADIPOQ","FABP4 ","PLIN1", "PLIN4" 
                 )

metadata <- merged_obj@meta.data
color <-c('#E5D2DD', '#53A85F')
p <- DotPlot(merged_obj, features = unique(majormarkergene),cols = color, group.by =   "merged_annotation"#, split.by = "group"
)+coord_flip()
exp <- p$data
exp$features.plot <- as.factor(exp$features.plot)
exp$features.plot <- fct_inorder(exp$features.plot)
exp$id <- as.factor(exp$id)
exp$id <- fct_inorder(exp$id)
colnames(exp)[5] <- 'AverageExpression'
colnames(exp)[2] <- 'Percent'

cell_types <- levels(exp$id)  
xintercepts <- seq(1.5, length(cell_types) - 0.5, by=1)  

ggplot(exp, aes(x=id, y=features.plot))+
  geom_point(aes(size= Percent, color=AverageExpression))+  
  geom_point(aes(size=Percent, color=AverageExpression), shape=21, color="black", stroke=1)+  
  scale_size_continuous(range = c(2,6))+  
  theme(panel.grid = element_blank(),  
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        panel.background = element_rect(color = 'black', size = 1.2, fill = 'transparent'), 
        axis.text.x = element_text(size=11, color="black", angle=45, hjust = 1),  
        axis.text.y = element_text(size=11, color="black"))+  
  scale_color_gradientn(colors = colorRampPalette(c("#c1719c","#d9bad0","#d0ecd7","#a3e0bb","#4fc3b0"))(100))+  
  labs(x=NULL, y=NULL)+ 
  geom_vline(xintercept = xintercepts, linetype="dotted", size=0.8)

merged_obj$automaticalannotation <- autodata$scimilarity_hint
saveRDS(merged_obj,"/Users/xu/Desktop/1_project/merged_obj.rds")